import os
import pandas as pd

# 需要处理的目录
folder_path = '知识产权补/软著/'

# 用于存放所有数据的列表
all_data = []

# 遍历目录下所有csv文件
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path, filename)
        print(f'正在读取: {file_path}')
        try:
            df = pd.read_csv(file_path, encoding='utf-8')
        except UnicodeDecodeError:
            try:
                df = pd.read_csv(file_path, encoding='gbk')
            except UnicodeDecodeError:
                df = pd.read_csv(file_path, encoding='gb18030')
        all_data.append(df)

# 合并所有DataFrame
merged_df = pd.concat(all_data, ignore_index=True)

# 去重
dedup_df = merged_df.drop_duplicates()

# 保存合并去重后的结果
output_path = os.path.join(folder_path, '软著_result.csv')
dedup_df.to_csv(output_path, index=False, encoding='utf-8-sig')

print(f'合并并去重后的数据已保存到: {output_path}')